/****

### Step 13: Create Joint Venture Data

  #### Part 1 - Stata:** `2_6_Joint_Venture_Data.do` 
  **Language:** Stata  
  **Description:** Starts from raw SDC joint venture data and creates main sample
  - **Stata Part (First):**
    - **Inputs:** SDC_joint_ventures, gvkey_cusip_links
    - **Outputs:** JV_cleaned_short


****/
global dir path
cd "$dir"

set processors 4

*** Clean SDC joint ventures:
*** SDC_joint_ventures: Raw Dataset downloaded from SDC on Joint Ventures. 

use "SDC_joint_ventures.dta", clear
	drop if pcusip==""
	gisid master_deal_no sequence_no // YES, unique
	destring master_deal_no, replace
	format master_deal_no %20.0f

*** Filters
	* Keep only US deals
	keep if pnationcode=="US"
	sort master_deal_no sequence_no
	replace statcode=statcode[_n-1] if master_deal_no==master_deal_no[_n-1] & sequence_no==sequence_no[_n-1]+1
	distinct master_deal_no  
	tab statcode

	* Keep only deals that have 2 or more members
	egen min_seq=min(sequence_no), by(master_deal_no)
	egen max_seq=max(sequence_no), by(master_deal_no)
	keep if min_seq==1 & max_seq>1
	drop min_seq
	rename max_seq num_members

	sort master_deal_no sequence_no
	global id_vars master_deal_no sequence_no wrds_first_deal_date pcusip statcode num_members 
	global vars_chr  jv research supply manufacturing license marketing funding
	global vars_num  sales exp_length hold
	order  $id_vars $vars_chr $vars_num
	br     $id_vars $vars_chr $vars_num
	keep 	 $id_vars $vars_chr $vars_num
	gen year=year(wrds_first_deal_date)

*** Fix the main variables
	foreach x in $vars_chr  {
		replace `x'=`x'[_n-1] if master_deal_no==master_deal_no[_n-1] & sequence_no==sequence_no[_n-1]+1
		replace `x'="N" if `x'=="No"
		replace `x'="Y" if `x'=="Yes"
		tab `x'
	}
	*** TEMP SAVE TO GET REMERGED
	preserve
	keep master_deal_no pcusip
	duplicates drop
	save "temp_JV", replace
	restore 
	***
	isid master_deal_no sequence_no
	rename pcusip cusip1
	count
	joinby master_deal_no using "temp_JV.dta", unmatched(master) _merge(_merge_pairs)
	count
	rename pcusip cusip2
	drop if cusip1 == cusip2
	drop _merge_pairs
	sort 	 master_deal_no sequence_no
	order  master_deal_no sequence_no num_members wrds_first_deal_date cusip1 cusip2 statcode  $vars_chr $vars_num
	br     master_deal_no sequence_no num_members wrds_first_deal_date cusip1 cusip2 statcode  $vars_chr $vars_num
	by master_deal_no, sort:  gen sequence_no2 = _n
	isid master_deal_no sequence_no2 // The new Unique ID


	*** DELETE temporary dataset:
	erase "temp_JV.dta"

*** LINK FOR CUSIPS

*** ------------------------- *** 
gen cusip6 = cusip1
	count
   joinby cusip6 using "gvkey_cusip_links", unmatched(none) _merge(_merge_gvkeys1)
	rename gvkey gvkey1
	keep if inrange(wrds_first_deal_date , startdate, enddate)
	drop startdate enddate cusip6

gen cusip6 = cusip2
	count
	joinby cusip6 using "gvkey_cusip_links", unmatched(none) _merge(_merge_gvkeys2)
	rename gvkey gvkey2
	keep if inrange(wrds_first_deal_date , startdate, enddate)
	drop startdate enddate cusip6
count
	drop _merge_gvkeys1 _merge_gvkeys2

	order gvkey1 gvkey2 year 
	sort gvkey1 year gvkey2
	gen alliance = 1 
	sort master_deal_no gvkey1 year gvkey2
	duplicates drop gvkey1 year gvkey2, force
	isid gvkey1 year gvkey2

count
compress
save "JV_cleaned", replace

preserve
	keep gvkey1 year gvkey2 alliance
	duplicates drop
saveold "JV_cleaned_short", replace
restore 

preserve
	keep gvkey1 gvkey2 alliance
	duplicates drop
save "JV_cleaned_short_XS", replace
restore 

use "JV_cleaned_short", clear

*** SWITCH TO SAS FILE: '2_Joint_Venture_Data(SAS).sas' (You can see the SAS code below)

***************************************************************************************************
*** To run this, you first run the Stata file: 2_Joint_Venture_Data.do

*** Combine the Supply Chain Network with the joint venture data ;
/* 
proc import datafile= "SCnetwork2.csv"
     out= SCnetwork2
     dbms=csv
     replace;
     getnames=yes;
run;
proc import datafile= "JV_cleaned_short.dta"
     out= JV_cleaned_short
     dbms=DTA
     replace;
run;
* Combine the two networks; 
	 proc sql;
	    create table SC_alliances as
	    select distinct a.*, max(b.alliance) as alliance 
	    from SCnetwork2 as a 
	    	left join JV_cleaned_short as b on a.gvkey1=b.gvkey1 and a.gvkey2=b.gvkey2 and b.year between a.fyear-1 and a.fyear+1
	    group by a.gvkey1, a.fyear, a.gvkey2
	    order by a.gvkey1, a.fyear, a.gvkey2
		;
	quit; 
	proc export data = SC_alliances
				file = "SC_alliances.dta"
				dbms = STATA REPLACE;
	run;

	proc sql; 
	select sum(alliance) as alliances 
	from SC_blocks; 
	quit; */
***************************************************************************************************/

/*** Then Come BACK AND RUN THIS: ***/

use "SC_alliances", clear // Created in SAS
	gen common_alliance = (alliance!=.)
	*** COLLAPSE AT THE FIRM LEVEL:
	egen common_alliance_max   = max(common_alliance), by(gvkey1 fyear)
	egen common_alliance_max_S = max(common_alliance / (supplier==1)), by(gvkey1 fyear)
	egen common_alliance_max_C = max(common_alliance / (supplier==0)), by(gvkey1 fyear)
    sum  common_alliance*
	rename gvkey1 gvkey 
	keep gvkey fyear common_alliance_max common_alliance_max_S common_alliance_max_C 
	duplicates drop
save "SC_alliances_collapsed", replace